This script uses a combination of the DEP-package and a limma-script that can handle paired samples (LA and RA from same horse). The DEP-package uses FDR-tools to generate q-values, which I have included in my own limma-script.

Required R libraries

if (!require("pacman")) install.packages("pacman")
## Loading required package: pacman
pacman::p_load("edgeR", "readr", "readxl", "biomaRt", "magrittr", "tibble", "stringr", 
               "ggplot2", "data.table", "patchwork", "openxlsx", "dplyr", "missForest", 
               "RColorBrewer", "limma", "DEqMS", "preprocessCore", "DEP", 
               "SummarizedExperiment", "Metrics", "fdrtool", "aamisc", "sva")

# install aamisc package for MDS and Volcano plots
# Commented out for future use:
# pacman::p_load("qvalue", "rain", "limma", "devtools")
# url <- "https://cran.r-project.org/src/contrib/Archive/HarmonicRegression/HarmonicRegression_1.0.tar.gz"
# pkgFile <- "HarmonicRegression_1.0.tar.gz"
# download.file(url = url, destfile = pkgFile)
# install.packages(pkgs=pkgFile, type="source", repos=NULL)
# file.remove(pkgFile)
# pacman::p_load_gh("altintasali/aamisc")

# Colours for publication
publication_colors <- c("Control" = "#285291", "Metformin" = "#7C1516", "Sham" = "#9B9B9B")

Read data and filtrer

# Load count data from a local file and metadata through Excel
# Define file paths
count_file <- "../../../data/count/report.unique_genes_matrix.tsv"
meta_file <- "../../../data/metadata/meta_proteomics.xlsx"
geneinfo_file <- "../../../data/gene_annotation/horse_gene_annotation.tsv.gz"

# Read count data
count <- readr::read_delim(count_file)
## Rows: 2967 Columns: 74
## -- Column specification --------------------------------------------------------
## Delimiter: "\t"
## chr  (1): Genes
## dbl (73): D:\Mass_spectrometry\Raw_data\Joakim\Simon Horse second run2\1A_RA...
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Read metadata
meta <- readxl::read_excel(meta_file)

# Remove unnecessary or blank columns from MS machine
count <- count %>% select(-matches("D:\\\\Mass_spectrometry\\\\Raw_data\\\\Joakim\\\\Simon Horse second run2\\\\10A_RA11_1_24830.d"))

# Read and clean gene annotation file
geneinfo <- fread(geneinfo_file)
setnames(geneinfo, old = names(geneinfo), new = c("ENSEMBL", "ENSEMBLv", "Description_detailed", 
                                                  "Chr", "Start", "End", "Strand", "GENENAME", "ENTREZID", "Description"))
geneinfo <- geneinfo %>% select(-ENSEMBLv, -Description_detailed) %>% distinct(ENSEMBL, .keep_all = TRUE)

# Merge gene annotation with count data
annot <- merge(count[, "Genes", drop = FALSE], geneinfo, by.x = "Genes", by.y = "GENENAME", all.x = TRUE)

# Clean count data
count <- count %>% remove_rownames() %>% column_to_rownames(var="Genes")
cleaned_names <- gsub("D:\\\\Mass_spectrometry\\\\Raw_data\\\\Joakim\\\\Simon Horse second run2\\\\|_.*", "", colnames(count))
colnames(count) <- cleaned_names
meta$`Sample ID` <- cleaned_names

#subset data to only RA terminal and baseline samples
meta <- meta[meta$Region %in% c("RA"), ]
count <- count[, colnames(count) %in% meta$`Sample ID`]


# Remove low count samples from Count and Meta
samples_to_remove <- c("25C", "12A", "22A", "19A")
count <- count[, !colnames(count) %in% samples_to_remove]
meta <- meta %>% filter(!`Sample ID` %in% samples_to_remove)

# Calculate the number of proteins in each sample
num_proteins <- colSums(!is.na(count))
num_proteins <- num_proteins[meta$`Sample ID`]  # Ensure matching order with meta

# Define color scheme for groups
KUalt <- c("#7C1516", "#285291", "#434343", "#999999")  # Custom color palette
group_colors <- setNames(KUalt[1:length(unique(meta$Group))], unique(meta$Group))
bar_colors <- group_colors[meta$Group]

# Plot the number of proteins in each sample before filtering
par(mfrow = c(1, 1))
barplot(num_proteins, 
        main = "Number of Proteins in Each Sample\nBefore Filtering",
        xlab = "Sample",
        ylab = "Number of Proteins",
        col = bar_colors,
        border = "black",
        ylim = c(0, 2500),
        las = 2)

# Print the total number of proteins before filtering
cat("The total number of proteins before filtering was", max(num_proteins), "\n")
## The total number of proteins before filtering was 2610
# Identify genes with less than three valid values in any condition
invalid_genes <- unique(unlist(lapply(unique(meta$Condition), function(condition) {
  condition_columns <- meta$`Sample ID`[meta$Condition == condition]
  condition_count <- count[, condition_columns, drop = FALSE]
  rownames(condition_count)[rowSums(!is.na(condition_count)) < 3]
})))

# Remove invalid genes from the original count table
filtered_count <- count[!rownames(count) %in% invalid_genes, ]

# Calculate the number of proteins in each sample after filtering
num_proteins_after <- colSums(!is.na(filtered_count))
num_proteins_after <- num_proteins_after[meta$`Sample ID`]  # Ensure matching order with meta

# Plot settings
par(mfrow = c(1, 2))

# First plot: Number of proteins in each sample before filtering
barplot(num_proteins, 
        main = "Number of Proteins in Each Sample\nBefore Filtering",
        xlab = "Sample",
        ylab = "Number of Proteins",
        col = bar_colors,
        border = "black",
        ylim = c(0, 2500),
        las = 2)

# Second plot: Number of proteins in each sample after filtering
barplot(num_proteins_after, 
        main = "Number of Proteins in Each Sample\nAfter Filtering",
        xlab = "Sample",
        ylab = "Number of Proteins",
        col = bar_colors,
        border = "black",
        ylim = c(0, 2500),
        las = 2)

# Print the total and average number of proteins before and after filtering
cat("The total number of proteins before filtering was", max(num_proteins), "\n")
## The total number of proteins before filtering was 2610
cat("The average number of proteins before filtering was", mean(num_proteins), "\n")
## The average number of proteins before filtering was 2168.578
cat("The total number of proteins after filtering was", max(num_proteins_after), "\n")
## The total number of proteins after filtering was 1288
cat("The average number of proteins after filtering was", mean(num_proteins_after), "\n")
## The average number of proteins after filtering was 1258.933

DEP-package https://bioconductor.org/packages/release/bioc/vignettes/DEP/inst/doc/DEP.html

Generate a SummarizedExperiment object

# Generate proxy columns similar to DEP's sample data to make the structure compatible.
proxy_column_names <- c("Protein.IDs", "Majority.protein.IDs", "Protein.names", "Gene.names", 
                        "Fasta.headers", "Peptides", "Razor...unique.peptides", "Unique.peptides", 
                        "Only.identified.by.site", "Reverse", "Potential.contaminant")
proxy_df <- data.frame(matrix(NA, nrow=nrow(filtered_count), ncol=length(proxy_column_names)))
colnames(proxy_df) <- proxy_column_names

# Assign row names to specific columns
rownames_to_columns <- rownames(filtered_count)
proxy_df[1:4] <- lapply(1:4, function(i) rownames_to_columns)

# Merge and rename sample columns
merged_df <- cbind(proxy_df, filtered_count)
colnames(merged_df)[12:ncol(merged_df)] <- paste0("LFQ.intensity.", colnames(filtered_count))

# Reorder columns
lfq_columns <- grep("^LFQ.intensity", colnames(merged_df), value = TRUE)
new_order <- c(proxy_column_names[1:8], lfq_columns, proxy_column_names[9:11])
data <- merged_df[, new_order]

print(colnames(data))
##  [1] "Protein.IDs"             "Majority.protein.IDs"   
##  [3] "Protein.names"           "Gene.names"             
##  [5] "Fasta.headers"           "Peptides"               
##  [7] "Razor...unique.peptides" "Unique.peptides"        
##  [9] "LFQ.intensity.1A"        "LFQ.intensity.1B"       
## [11] "LFQ.intensity.2A"        "LFQ.intensity.2B"       
## [13] "LFQ.intensity.3A"        "LFQ.intensity.3B"       
## [15] "LFQ.intensity.4A"        "LFQ.intensity.4B"       
## [17] "LFQ.intensity.5A"        "LFQ.intensity.5B"       
## [19] "LFQ.intensity.6A"        "LFQ.intensity.6B"       
## [21] "LFQ.intensity.7A"        "LFQ.intensity.7B"       
## [23] "LFQ.intensity.8A"        "LFQ.intensity.8B"       
## [25] "LFQ.intensity.9A"        "LFQ.intensity.9B"       
## [27] "LFQ.intensity.10A"       "LFQ.intensity.10B"      
## [29] "LFQ.intensity.11A"       "LFQ.intensity.11B"      
## [31] "LFQ.intensity.12B"       "LFQ.intensity.13A"      
## [33] "LFQ.intensity.13B"       "LFQ.intensity.14A"      
## [35] "LFQ.intensity.14B"       "LFQ.intensity.15A"      
## [37] "LFQ.intensity.15B"       "LFQ.intensity.16A"      
## [39] "LFQ.intensity.16B"       "LFQ.intensity.17A"      
## [41] "LFQ.intensity.17B"       "LFQ.intensity.18A"      
## [43] "LFQ.intensity.18B"       "LFQ.intensity.19B"      
## [45] "LFQ.intensity.20A"       "LFQ.intensity.20B"      
## [47] "LFQ.intensity.22B"       "LFQ.intensity.23A"      
## [49] "LFQ.intensity.23B"       "LFQ.intensity.24A"      
## [51] "LFQ.intensity.24B"       "LFQ.intensity.25A"      
## [53] "LFQ.intensity.25B"       "Only.identified.by.site"
## [55] "Reverse"                 "Potential.contaminant"
#Are there any duplicated gene names?
data$Gene.names %>% duplicated() %>% any()
## [1] FALSE
# Make unique names using the annotation in the "Gene.names" column as primary names and the annotation in "Protein.IDs" as name for those that do not have an gene name.
data_unique <- make_unique(data, "Gene.names", "Protein.IDs", delim = ";")

# Generate a SummarizedExperiment object using an experimental design
LFQ_columns <- grep("LFQ.", colnames(data_unique)) # get LFQ column numbers

# Prepare the metadata for DEP analysis
experimental_design <- data.frame(
  label = colnames(filtered_count),
  condition = meta$Condition,
  replicate = NA  # Initialize replicate column with NA
)

# Assign replicate numbers within each condition
experimental_design <- experimental_design %>%
  group_by(condition) %>%
  mutate(replicate = row_number()) %>%
  ungroup()

# Set row names
row.names(experimental_design) <- experimental_design$label
## Warning: Setting row names on a tibble is deprecated.
# Ensure the row names of experimental_design are unique
experimental_design <- as.data.frame(experimental_design)
rownames(experimental_design) <- make.unique(rownames(experimental_design))

# Print the experimental design to verify
print(experimental_design)
##    label             condition replicate
## 1     1A RA_Metformin_Baseline         1
## 2     1B  RA_Metformin_4months         1
## 3     2A   RA_Control_Baseline         1
## 4     2B    RA_Control_4months         1
## 5     3A RA_Metformin_Baseline         2
## 6     3B  RA_Metformin_4months         2
## 7     4A      RA_Sham_Baseline         1
## 8     4B       RA_Sham_4months         1
## 9     5A RA_Metformin_Baseline         3
## 10    5B  RA_Metformin_4months         3
## 11    6A   RA_Control_Baseline         2
## 12    6B    RA_Control_4months         2
## 13    7A RA_Metformin_Baseline         4
## 14    7B  RA_Metformin_4months         4
## 15    8A   RA_Control_Baseline         3
## 16    8B    RA_Control_4months         3
## 17    9A RA_Metformin_Baseline         5
## 18    9B  RA_Metformin_4months         5
## 19   10A   RA_Control_Baseline         4
## 20   10B    RA_Control_4months         4
## 21   11A RA_Metformin_Baseline         6
## 22   11B  RA_Metformin_4months         6
## 23   12B    RA_Control_4months         5
## 24   13A RA_Metformin_Baseline         7
## 25   13B  RA_Metformin_4months         7
## 26   14A   RA_Control_Baseline         5
## 27   14B    RA_Control_4months         6
## 28   15A   RA_Control_Baseline         6
## 29   15B    RA_Control_4months         7
## 30   16A   RA_Control_Baseline         7
## 31   16B    RA_Control_4months         8
## 32   17A RA_Metformin_Baseline         8
## 33   17B  RA_Metformin_4months         8
## 34   18A      RA_Sham_Baseline         2
## 35   18B       RA_Sham_4months         2
## 36   19B  RA_Metformin_4months         9
## 37   20A   RA_Control_Baseline         8
## 38   20B    RA_Control_4months         9
## 39   22B    RA_Control_4months        10
## 40   23A RA_Metformin_Baseline         9
## 41   23B  RA_Metformin_4months        10
## 42   24A      RA_Sham_Baseline         3
## 43   24B       RA_Sham_4months         3
## 44   25A      RA_Sham_Baseline         4
## 45   25B       RA_Sham_4months         4
# Create the SummarizedExperiment Object
# This will allow DEP to use the data for further analysis and visualization
data_se <- make_se(data_unique, LFQ_columns, experimental_design)

Protein Coverage & filtrer on missing values

# Plot a barplot of the protein identification overlap between samples
plot_frequency(data_se)

plot_numbers(data_se)

plot_coverage(data_se)

# Filter for proteins that are identified in all replicates of at least one condition
#data_filt <- filter_missval(data_se, thr = 0)
#plot_frequency(data_filt)

# Plot a barplot of the number of identified proteins per samples
plot_numbers(data_se)

# Plot a barplot of the protein identification overlap between samples
plot_coverage(data_se)

## Normalizaiton

#Check un-normalized data
plot_normalization(data_se)

#VSN-normalization
data_se_norm <- normalize_vsn(data_se)

#Plot VSN-norm data
meanSdPlot(data_se_norm, rank = TRUE)

plot_normalization(data_se_norm)

###Verdict: After variance stabilisation, the median (a reasonable estimator of the standard deviation of feature level data conditional on the mean) is approximately a horizontal line.

Impute data for missing values

#TODO: Data imputation may not be necessary for your case. limma can handle missing values.

# Plot a heatmap of proteins with missing values
plot_missval(data_se_norm)

# Plot intensity distributions and cumulative fraction of proteins with and without missing values
plot_detect(data_se_norm)

#The nature of the missing-ness:  the proteins with missing values have low intensities on average. This data (MNAR and close to the detection limit) should be imputed by a left-censored imputation method, such as the quantile regression-based left-censored function (“QRILC”) or random draws from a left-shifted distribution (“MinProb” and “man”). 

# Impute missing data using random draws from a Gaussian distribution centered around a minimal value (for MNAR)
data_imp_MinProb <- impute(data_se_norm, fun = "MinProb", q = 0.01) 
## Imputing along margin 2 (samples/columns).
## [1] 0.4919121
# Impute missing data using random draws from a manually defined left-shifted Gaussian distribution (for MNAR)
data_imp_man <- impute( data_se_norm, fun = "man", shift = 1.8, scale = 0.3) 


# Impute missing data using Quantile Regression Imputation of Left-Censored Data (It’s particularly useful when missing values are expected to be low or below a detection limit.)
data_imp_qrilc <- impute( data_se_norm, fun = "QRILC") 
## Imputing along margin 2 (samples/columns).
# Plot intensity distributions before and after imputation
plot_imputation( data_se_norm, data_imp_MinProb, data_imp_man, data_imp_qrilc)

###Verdict: data_imp_Minprob seems to be  addressing MNAR the best, and will be used downstream. 

Differential abundance analysis using DEP

# Note: This analysis is intended as an extra quality control (QC) step.
# The main differential abundance analysis is performed using the `limma` package due to the more complex study design.
#Set contrasts
con <- c("RA_Control_4months_vs_RA_Sham_4months", "RA_Metformin_4months_vs_RA_Control_4months")

# Perform differential analysis using the specified contrasts - best on non-imputed data, honestly
data_diff <- test_diff(data_imp_qrilc, type = "manual", test = con)
## Tested contrasts: RA_Control_4months_vs_RA_Sham_4months, RA_Metformin_4months_vs_RA_Control_4months
# Denote significant proteins based on user defined cutoffs
dep <- add_rejections(data_diff, alpha = 0.05, lfc = log2(0))

# Generate a long data.frame
df_long <- get_df_long(dep)
df_wide <- get_df_wide(dep)

##Volcano plots and DAE using DEP

# Plot a volcano plot
plot_volcano(dep, contrast = "RA_Control_4months_vs_RA_Sham_4months", label_size = 2, add_names = TRUE)

plot_volcano(dep, contrast = "RA_Metformin_4months_vs_RA_Control_4months", label_size = 2, add_names = TRUE)

# Plot a frequency plot of significant proteins for the different conditions
plot_cond(dep)

# Generate a results table
data_results <- get_results(dep)

# Number of significant proteins
data_results %>% filter(significant) %>% nrow()
## [1] 9

MDS-plots on non-imputed data

#TODO: I would investigate the MDS plots without batch removal as well. 

#Log-transformation
logCounts <- filtered_count
#For PCA plot, we will remove the NAs - 
logCounts_noNA  <- na.omit(logCounts)

# Make DGE list and define design
annot_reordered <- annot[match(rownames(logCounts_noNA), annot$Genes), ]
d <- DGEList(counts = logCounts_noNA, genes = annot_reordered, samples = meta)
design <- model.matrix(~0 + Condition, d$samples)

# Remove batch
logCounts_batchRemoved <- removeBatchEffect(logCounts_noNA, 
                                            batch = as.factor(d$samples$Horse), 
                                            design = design)
## Coefficients not estimable: batch22 batch23
## Warning: Partial NA coefficients for 769 probe(s)
meta$Group <- factor(meta$Group, levels = names(publication_colors))

# Plot MDS
mds <- plotMDS(logCounts_batchRemoved, plot = FALSE)

dims <- list(p1 = c(1,2), p2 = c(1,3), p3 = c(2,3), p4 = c(1,4))
mds_plot <- list()

# Without labels
for (i in seq_along(dims)){
  mds_plot[[i]] <- aamisc::ggMDS(mds = mds,
                                 meta = d$samples, 
                                 dim = dims[[i]], 
                                 color.by = "Group", 
                                 shape.by = "Timepoint",
                                 legend.position = "right"
                                 ) + 
                  scale_color_manual(values = publication_colors)
}

plot1 <- patchwork::wrap_plots(mds_plot, ncol = 2) + patchwork::plot_layout(guides = 'collect')

# With labels
for (i in seq_along(dims)){
  mds_plot[[i]] <- aamisc::ggMDS(mds = mds,
                                 meta = d$samples, 
                                 dim = dims[[i]], 
                                 color.by = "Group", 
                                 shape.by = "Timepoint",
                                 legend.position = "right",
                                 text.by = "Horse",
                                 text.size = 1.5
                                 ) + 
                  scale_color_manual(values = publication_colors)
}

plot2 <- patchwork::wrap_plots(mds_plot, ncol = 2) + patchwork::plot_layout(guides = 'collect')

# Display the plots
print(plot1)

print(plot2)

Checking histograms and save/load imputations

# Histograms will be generated for four datasets:
# 1. `normalized_unimputed`: Normalized but not imputed data.
# 2. `imputed_data_MinProb`: Data imputed using the "MinProb" method.
# 3. `imputed_data_qrilc`: Data imputed using the "QRILC" method.
# 4. `unfiltered_count`: Raw log-transformed counts after initial filtering.

# Extract the assay data from data_se_norm (normalized_unimputed)
normalized_unimputed <- as.data.frame(assay(data_se_norm))
colnames(normalized_unimputed) <- colnames(filtered_count)
rownames(normalized_unimputed) <- rownames(data_se_norm)

# Extract the assay data from data_imp_MinProb (imputed)
imputed_data_MinProb <- as.data.frame(assay(data_imp_MinProb))
colnames(imputed_data_MinProb) <- colnames(filtered_count)
rownames(imputed_data_MinProb) <- rownames(data_se_norm)

# Extract the assay data from data_imp_qrilc
imputed_data_qrilc <- as.data.frame(assay(data_imp_qrilc))
colnames(imputed_data_qrilc) <- colnames(filtered_count)
rownames(imputed_data_qrilc) <- rownames(data_se_norm)

# Extract the assay data from data_imp_man
imputed_data_man <- as.data.frame(assay(data_imp_man))
colnames(imputed_data_man) <- colnames(filtered_count)
rownames(imputed_data_man) <- rownames(data_se_norm)

# Plot histogram for normalized data
par(mfrow = c(1, 4))
hist(as.vector(as.matrix(normalized_unimputed)), breaks = 50, main = "Normalized Data", xlab = "Intensity")
hist(as.vector(as.matrix(imputed_data_MinProb)), breaks = 50, main = "MinProb Imputed Data", xlab = "Intensity")
hist(as.vector(as.matrix(imputed_data_qrilc)), breaks = 50, main = "QRILC Imputed Data", xlab = "Intensity")
hist(as.vector(as.matrix(filtered_count)), breaks = 50, main = "Unfiltered Data", xlab = "Intensity")

# Define the output folder path
output_folder <- "../../../../Timecourse/analysis/01_dge/output/"

# Function to save a matrix with row names as a separate column
save_matrix <- function(matrix, file_path) {
  # Create a data frame from the matrix, including row names as a column
  matrix_df <- as.data.frame(matrix)
  matrix_df$GeneName <- rownames(matrix)
  fwrite(matrix_df, file = file_path, sep = "\t", row.names = FALSE)
}

# Function to load a matrix and restore row names
load_matrix <- function(file_path) {
  matrix_df <- fread(file_path, data.table = FALSE)
  rownames(matrix_df) <- matrix_df$GeneName
  matrix_df$GeneName <- NULL
  return(as.matrix(matrix_df))  # Convert back to a matrix
}

# Save the matrices
 # save_matrix(normalized_unimputed, paste0(output_folder, "normalized_unimputed.tsv"))
 # save_matrix(imputed_data_MinProb, paste0(output_folder, "imputed_data_MinProb.tsv"))
 # save_matrix(imputed_data_qrilc, paste0(output_folder, "imputed_data_qrilc.tsv"))

# Load the matrices in the future
normalized_unimputed <- load_matrix(paste0(output_folder, "normalized_unimputed.tsv"))
imputed_data_MinProb <- load_matrix(paste0(output_folder, "imputed_data_MinProb.tsv"))
imputed_data_qrilc <- load_matrix(paste0(output_folder, "imputed_data_qrilc.tsv"))

Limma on SVA Data

SVA Limma

#TODO: General comment: you may want to organize your sections in a better way. For example,
#      how about just naming this section "SVA Limma" and remove the section "Limma on SVA"

# Extract batch and biological condition information
batch <- d$samples$Batch  # Specify the batch variable
condition <- d$samples$Condition  # Specify the biological condition

#TODO: ComBat may not be appropriate here as it totally removes the "known" batch effect from your data
#TODO: Instead we want to model the "unknown" batch effect with SVA
# Run ComBat to correct for batch effects
combat_data <- ComBat(dat = imputed_data_MinProb, batch = batch, mod = model.matrix(~ condition))
## Found4batches
## Adjusting for5covariate(s) or covariate level(s)
## Standardizing Data across genes
## Fitting L/S model and finding priors
## Finding parametric adjustments
## Adjusting the Data
# Check if the batch effect is removed using PCA or similar methods
# PCA to visualize the data before and after batch correction (optional)
# You can run a PCA analysis here to check the effectiveness of ComBat

# Proceed with SVA to adjust for hidden confounders
mod <- model.matrix(~ 0 + condition, data = d$samples)  # Full model
mod0 <- model.matrix(~ 1, data = d$samples)  # Null model

n.sv <- num.sv(combat_data, mod, method = "be")  # Estimate number of surrogate variables
svobj <- sva(combat_data, mod, mod0, n.sv = n.sv)
## Number of significant surrogate variables is:  8 
## Iteration (out of 5 ):1  2  3  4  5
# Adjust the design matrix to include surrogate variables
modSv <- cbind(mod, svobj$sv)

# Run duplicateCorrelation to account for repeated measures (e.g., "Horse" as a blocking factor)
corfit <- duplicateCorrelation(combat_data, modSv, block = as.factor(d$samples$Horse))

#TODO: Have a look at your consensus correlation, it is ~0, which means that there are no random
#TODO: effects in your data. This is because you removed the "batch" effect by ComBat. 
corfit$consensus.correlation
## [1] 0.0007225026
# Fit the linear model using limma, accounting for repeated measures
fit <- lmFit(combat_data, modSv, block = as.factor(d$samples$Horse), correlation = corfit$consensus.correlation)

#TODO: your data does not have the "batch" anymore as you removed it by combat. So, you
#TODO: should not model "Batch" again...
#TODO: On the other hand, you are not using this "design" anywhere. 
design <- model.matrix(~0 + Condition + Batch, d$samples)

# Ensure all column names in modSv are syntactically valid
colnames(modSv) <- make.names(colnames(modSv))

# Check the updated column names to use in the makeContrasts function
print(colnames(modSv))
##  [1] "conditionRA_Control_4months"    "conditionRA_Control_Baseline"  
##  [3] "conditionRA_Metformin_4months"  "conditionRA_Metformin_Baseline"
##  [5] "conditionRA_Sham_4months"       "conditionRA_Sham_Baseline"     
##  [7] "X"                              "X"                             
##  [9] "X"                              "X"                             
## [11] "X"                              "X"                             
## [13] "X"                              "X"
#TODO: fix the column names. They look like this at the moment: 
#  [1] "conditionRA_Control_4months"    "conditionRA_Control_Baseline"   "conditionRA_Metformin_4months" 
#  [4] "conditionRA_Metformin_Baseline" "conditionRA_Sham_4months"       "conditionRA_Sham_Baseline"     
#  [7] "X"                              "X"                              "X"                             
# [10] "X"                              "X"                              "X"                             
# [13] "X"                              "X"       

# Define contrasts with the updated column names
con <- makeContrasts(
  Metformin_vs_AF_RA = conditionRA_Metformin_4months - conditionRA_Control_4months,
  AF_vs_Sham_RA = conditionRA_Control_4months - conditionRA_Sham_4months,
  Terminal_vs_Baseline_Control = conditionRA_Control_4months - conditionRA_Control_Baseline,
  Terminal_vs_Baseline_Metformin = conditionRA_Metformin_4months - conditionRA_Metformin_Baseline,
  Diff_Treatment = (conditionRA_Metformin_4months - conditionRA_Metformin_Baseline) - (conditionRA_Control_4months - conditionRA_Control_Baseline),
  Diff_Disease = (conditionRA_Control_4months - conditionRA_Control_Baseline) - (conditionRA_Sham_4months - conditionRA_Sham_Baseline),
  Baseline_Difference_Metf_vs_AF = conditionRA_Metformin_Baseline - conditionRA_Control_Baseline,
  Baseline_Difference_AF_vs_Sham = conditionRA_Control_Baseline - conditionRA_Sham_Baseline,
  levels = modSv)
con
##                                 Contrasts
## Levels                           Metformin_vs_AF_RA AF_vs_Sham_RA
##   conditionRA_Control_4months                    -1             1
##   conditionRA_Control_Baseline                    0             0
##   conditionRA_Metformin_4months                   1             0
##   conditionRA_Metformin_Baseline                  0             0
##   conditionRA_Sham_4months                        0            -1
##   conditionRA_Sham_Baseline                       0             0
##   X                                               0             0
##   X                                               0             0
##   X                                               0             0
##   X                                               0             0
##   X                                               0             0
##   X                                               0             0
##   X                                               0             0
##   X                                               0             0
##                                 Contrasts
## Levels                           Terminal_vs_Baseline_Control
##   conditionRA_Control_4months                               1
##   conditionRA_Control_Baseline                             -1
##   conditionRA_Metformin_4months                             0
##   conditionRA_Metformin_Baseline                            0
##   conditionRA_Sham_4months                                  0
##   conditionRA_Sham_Baseline                                 0
##   X                                                         0
##   X                                                         0
##   X                                                         0
##   X                                                         0
##   X                                                         0
##   X                                                         0
##   X                                                         0
##   X                                                         0
##                                 Contrasts
## Levels                           Terminal_vs_Baseline_Metformin Diff_Treatment
##   conditionRA_Control_4months                                 0             -1
##   conditionRA_Control_Baseline                                0              1
##   conditionRA_Metformin_4months                               1              1
##   conditionRA_Metformin_Baseline                             -1             -1
##   conditionRA_Sham_4months                                    0              0
##   conditionRA_Sham_Baseline                                   0              0
##   X                                                           0              0
##   X                                                           0              0
##   X                                                           0              0
##   X                                                           0              0
##   X                                                           0              0
##   X                                                           0              0
##   X                                                           0              0
##   X                                                           0              0
##                                 Contrasts
## Levels                           Diff_Disease Baseline_Difference_Metf_vs_AF
##   conditionRA_Control_4months               1                              0
##   conditionRA_Control_Baseline             -1                             -1
##   conditionRA_Metformin_4months             0                              0
##   conditionRA_Metformin_Baseline            0                              1
##   conditionRA_Sham_4months                 -1                              0
##   conditionRA_Sham_Baseline                 1                              0
##   X                                         0                              0
##   X                                         0                              0
##   X                                         0                              0
##   X                                         0                              0
##   X                                         0                              0
##   X                                         0                              0
##   X                                         0                              0
##   X                                         0                              0
##                                 Contrasts
## Levels                           Baseline_Difference_AF_vs_Sham
##   conditionRA_Control_4months                                 0
##   conditionRA_Control_Baseline                                1
##   conditionRA_Metformin_4months                               0
##   conditionRA_Metformin_Baseline                              0
##   conditionRA_Sham_4months                                    0
##   conditionRA_Sham_Baseline                                  -1
##   X                                                           0
##   X                                                           0
##   X                                                           0
##   X                                                           0
##   X                                                           0
##   X                                                           0
##   X                                                           0
##   X                                                           0
# Apply contrasts and run eBayes
fit <- contrasts.fit(fit, con)
## Warning in contrasts.fit(fit, con): row names of contrasts don't match col
## names of coefficients
fit <- eBayes(fit, robust = TRUE, trend = TRUE)

# Extract DGE results using the BH method for FDR correction
res <- list()  # List to store DGE results
for (i in colnames(con)) {
  res_tmp <- topTable(fit, coef = i, adjust.method = "BH", number = Inf)  # Get top table results
  res_tmp <- res_tmp[!is.na(res_tmp$t), ]  # Remove rows with NA values
  res_tmp$Contrast <- i #TODO: replaced this part >>> rep(i, nrow(res_tmp))  # Store the contrast name
  res[[i]] <- res_tmp  # Add to the results list
  
  # Print the number of differentially expressed genes based on adjusted p-values
  n_adj_pval <- nrow(res_tmp[res_tmp$adj.P.Val < 0.05, ])
  print(paste('Number of differentially expressed genes for', i, 'based on adjusted p-value (BH) =', n_adj_pval))
}
## [1] "Number of differentially expressed genes for Metformin_vs_AF_RA based on adjusted p-value (BH) = 0"
## [1] "Number of differentially expressed genes for AF_vs_Sham_RA based on adjusted p-value (BH) = 10"
## [1] "Number of differentially expressed genes for Terminal_vs_Baseline_Control based on adjusted p-value (BH) = 504"
## [1] "Number of differentially expressed genes for Terminal_vs_Baseline_Metformin based on adjusted p-value (BH) = 499"
## [1] "Number of differentially expressed genes for Diff_Treatment based on adjusted p-value (BH) = 1"
## [1] "Number of differentially expressed genes for Diff_Disease based on adjusted p-value (BH) = 25"
## [1] "Number of differentially expressed genes for Baseline_Difference_Metf_vs_AF based on adjusted p-value (BH) = 2"
## [1] "Number of differentially expressed genes for Baseline_Difference_AF_vs_Sham based on adjusted p-value (BH) = 90"
# Combine all results into a single data frame
res_all <- do.call(rbind, res)

# Map Gene Names Manually
res_all$GeneName <- sapply(seq_len(nrow(res_all)), function(i) {
  gsub(paste0("^", res_all$Contrast[i], "\\."), "", rownames(res_all)[i])
})

# Split results by contrast for easier output
res_split <- split(res_all, res_all$Contrast)

# Optionally, save the results to files
# openxlsx::write.xlsx(x = res_split, file = "../../../../Timecourse/analysis/01_dge/output/dge_results_sva_combat.xlsx", asTable = TRUE)
# data.table::fwrite(x = res_all, file = "../../../../Timecourse/analysis/01_dge/output/dge_results_sva.tsv.gz", sep = "\t")

# Loop through each data frame in res_split to create p-value histograms
for (contrast in names(res_split)) {
  # Extract the P.Values for the current contrast
  p_values <- res_split[[contrast]]$P.Value
  
  # Create a histogram using ggplot2
  ggplot(data = data.frame(P.Value = p_values), aes(x = P.Value)) +
    geom_histogram(breaks = seq(0, 1, by = 0.05), closed = "right", #TODO: FYI, better histogram view
                   color = "black", fill = "lightblue") +
    theme_minimal() +
    labs(
      title = paste("P-value Histogram for", contrast),
      x = "P-value",
      y = "Frequency"
    ) +
    xlim(0, 1) # Set x-axis limits to be between 0 and 1
  
  # Print each histogram
  print(last_plot())
}

Volcano plots

volcano_plots <- list()
for (i in names(res)){
  volcano_plots[[i]] <- ggVolcano(x = res[[i]], 
                                  fdr = 0.05,
                                  fdr.column = "adj.P.Val", 
                                  pvalue.column = "P.Value", 
                                  logFC = 0, 
                                  logFC.column = "logFC", 
                                  text.size = 2) + 
    theme_bw(base_size = 10) + 
    ggtitle(i)
}
## Warning in max(x[get(fdr.column) <= .fdr][, get(pvalue.column)]): no
## non-missing arguments to max; returning -Inf
patchwork::wrap_plots(volcano_plots, ncol = 3)

Better visualization of Volcano plots

library(ggrepel)

# Create a named vector for ENSEMBL to Genes mapping
ensembl_to_Genes <- setNames(annot_reordered$Genes, annot_reordered$ENSEMBL)

# Map ENSEMBL IDs to Gene Names in the `res` list
for (contrast_name in names(res)) {
  # Ensure the dataframe has ENSEMBL IDs as rownames
  if (!"ENSEMBL" %in% colnames(res[[contrast_name]])) {
    res[[contrast_name]]$ENSEMBL <- rownames(res[[contrast_name]])
  }
  
  # Map Gene Names using the annotation
  res[[contrast_name]]$Genes <- ensembl_to_Genes[res[[contrast_name]]$ENSEMBL]
  
  # Replace NA values in Genes with ENSEMBL IDs (to ensure plotting works even if some gene names are missing)
  res[[contrast_name]]$Genes[is.na(res[[contrast_name]]$Genes)] <- res[[contrast_name]]$ENSEMBL[is.na(res[[contrast_name]]$Genes)]
}

# Load the volcano plot helper function
source("volcano_helpers.R")

# Create lists for volcano plots with and without labels
volcano_plots_no_labels <- list()
volcano_plots_with_labels <- list()

# Iterate over each contrast and create volcano plots
for (contrast_name in names(res)) {
  # Ensure the Genes column is present for labeling
  if (!"Genes" %in% colnames(res[[contrast_name]])) {
    res[[contrast_name]]$Genes <- sapply(rownames(res[[contrast_name]]), function(x) gsub(".*_", "", x))
  }
  
  # Generate volcano plots using the helper function
  volcano_plots <- create_custom_volcano_plot(
    df = res[[contrast_name]],
    logFC_col = "logFC",
    pvalue_col = "P.Value",
    adj_pvalue_col = "adj.P.Val",
    contrast_name = contrast_name,
    fc_cutoff = 0,  # Set fold-change cutoff for significance
    pvalue_cutoff = 0.05,  # Set p-value cutoff
    save_plot = TRUE, 
    output_path = "../output/",  # Adjust output path if needed
    show_labels = TRUE  # Generate both labeled and unlabeled plots
  )
  
  # Store the plots
  volcano_plots_no_labels[[contrast_name]] <- volcano_plots$No_Labels
  volcano_plots_with_labels[[contrast_name]] <- volcano_plots$With_Labels
}
## Warning: ggrepel: 412 unlabeled data points (too many overlaps). Consider increasing max.overlaps
## ggrepel: 412 unlabeled data points (too many overlaps). Consider increasing max.overlaps
## Warning: ggrepel: 21 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
# Combine and display volcano plots without labels
combined_volcano_no_labels <- patchwork::wrap_plots(volcano_plots_no_labels, ncol = 3)

# Combine and display volcano plots with labels
combined_volcano_with_labels <- patchwork::wrap_plots(volcano_plots_with_labels, ncol = 3)

# Display the combined volcano plots
print(combined_volcano_no_labels)

print(combined_volcano_with_labels)
## Warning: ggrepel: 502 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 496 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 7 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 87 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

# Print individual volcano plots with labels for key contrasts
print(volcano_plots_with_labels[["Diff_Treatment"]])

print(volcano_plots_with_labels[["Diff_Disease"]])

ggsave("../output/volcano_Diff_Treatment_for_figure.png", (volcano_plots_with_labels[["Diff_Treatment"]]), 
       dpi = 600, width = 4, height = 3, units = "in")

Plotting One

Understanding Treatment Direction and Plotting Protein Abundance

# Understanding LogFC for "Diff_Treatment"
# Positive LogFC indicates:
# 1) Metformin group shows a relatively higher level compared to the control group.
# 2) This can occur if:
#    a) Metformin increases and control decreases.
#    b) Both increase, but metformin shows a greater increase.
#    c) Both decrease, but metformin shows a smaller decrease.

# Negative LogFC indicates:
# 1) Metformin group shows a relatively lower level compared to the control group.
# 2) This can occur if:
#    a) Metformin decreases and control increases.
#    b) Both decrease, but metformin shows a greater decrease.
#    c) Both increase, but metformin shows a smaller increase.

# Load ggpubr for enhanced visualization
library(ggpubr)

# Step 1: Define the function to create a violin plot with individual points and mean line
plot_protein_counts <- function(protein_of_interest) {
  # Check if the protein is present in the dataset
  if (protein_of_interest %in% rownames(data_se_norm)) {
    # Extract the normalized counts for the protein
    protein_counts <- assay(data_se_norm)[protein_of_interest,]
    
    # Create a data frame for plotting
    protein_df <- data.frame(
      SampleID = colnames(data_se_norm), # Extract sample IDs
      Count = protein_counts,            # Protein counts
      Condition = meta$Condition         # Experimental conditions
    )
    
    # Generate violin plot with jitter points and mean line
    p <- ggplot(protein_df, aes(x = Condition, y = Count, fill = Condition)) +
      geom_violin(trim = FALSE, alpha = 0.5) +                        # Violin plot
      geom_jitter(width = 0.2, size = 2, alpha = 0.7) +               # Add individual points
      stat_summary(fun = mean, geom = "crossbar", width = 0.5, fatten = 2, color = "red") +  # Mean line
      labs(
        title = paste("Violin plot of normalized counts for", protein_of_interest),  # Plot title
        x = "Condition", y = "Normalized Abundance"                                  # Axis labels
      ) +
      theme_pubr() +                                                  # Apply a clean theme
      scale_fill_brewer(palette = "Set3")                             # Color palette

    print(p)  # Display the plot
  } else {
    # Output message if protein is not found
    cat("The protein", protein_of_interest, "is not present in the dataset.\n")
  }
}

# Step 2: Visualize selected proteins with significant treatment effects to understand, why it is upregulated and downregulated (see comment in beginning of chunk)

# A) Electron Transport Chain Genes - negatively regulated (blue) in "Diff_Treatment"
plot_protein_counts("NDUFA6")   # Associated with metformin in studies related to Aortic Aneurysms, increases less in metformin group

plot_protein_counts("ATP5F1C")  # Important candidate gene for treatment effects, small decrease in metformin group

plot_protein_counts("KARS1")    # Downregulated in response to treatment 

# B) Proteasome Proteins - negatively regulated (blue) in "Diff_Treatment"
plot_protein_counts("PSMC5")    # Protein linked to proteasome function

plot_protein_counts("PSMC4")    # Another candidate for proteasome-associated mechanisms

plot_protein_counts("PSMD11")   # Related to proteasome degradation processes

# C) Heat Shock Proteins (Hsp90 Family) - negatively regulated (blue) in "Diff_Treatment"
plot_protein_counts("SUGT1")    # Chaperone activity protein
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_summary()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

plot_protein_counts("HSP90AA1") # Hsp90 protein linked to stress response

plot_protein_counts("DYNC1H1")  # Motor protein involved in cellular transport

# D) Detoxification of Reactive Oxygen Species (ROS) - positively regulated (red) in "Diff_Treatment"
plot_protein_counts("TXNRD2")   # Key enzyme in ROS detoxification

plot_protein_counts("TXN")      # Thioredoxin, involved in redox regulation

plot_protein_counts("SOD3")     # Superoxide dismutase, a primary ROS scavenger

# Additional Proteins of Interest
plot_protein_counts("DDAH1")    # Dimethylarginine Dimethylaminohydrolase 1

plot_protein_counts("COQ8A")    # Coenzyme Q8 homolog

plot_protein_counts("RICTOR")   # Component of mTOR complex
## The protein RICTOR is not present in the dataset.
plot_protein_counts("YWHAE")    # 14-3-3 protein epsilon, signaling protein

plot_protein_counts("TXNDC5")   # Protein disulfide isomerase, ROS response

plot_protein_counts("GNAI2")

Focus on GNAI2

# Define the function to create a violin plot with individual points and mean line
plot_protein_counts <- function(protein_of_interest) {
  # Check if the protein is present in the dataset
  if (protein_of_interest %in% rownames(data_se_norm)) {
    # Extract the normalized counts for the protein
    protein_counts <- assay(data_se_norm)[protein_of_interest,]
    
    # Create a data frame for plotting
    protein_df <- data.frame(
      SampleID = colnames(data_se_norm), # Extract sample IDs
      Count = protein_counts,            # Protein counts
      Condition = meta$Condition         # Experimental conditions
    )
    
    # Generate violin plot with jitter points and mean line
    p <- ggplot(protein_df, aes(x = Condition, y = Count, fill = Condition)) +
      geom_violin(trim = FALSE, alpha = 0.7, color = "black") +       # Violin plot with border
      geom_jitter(width = 0.15, size = 1.5, alpha = 0.6, color = "black") +  # Individual points
      stat_summary(fun = mean, geom = "crossbar", width = 0.5, fatten = 2, color = "darkred") +  # Mean line
      labs(
        title = paste("Normalized Protein Counts for", protein_of_interest),  # Plot title
        x = "Condition", y = "Normalized Abundance"                           # Axis labels
      ) +
      theme_pubr() +                                                          # Apply a clean, publication-ready theme
      scale_fill_brewer(palette = "Set3") +                                   # Use a color palette with enough colors
      theme(
        plot.title = element_text(size = 14, face = "bold", hjust = 0.5),     # Centered and bold title
        axis.title = element_text(size = 12, face = "bold"),                  # Bold axis titles
        axis.text = element_text(size = 10, color = "black"),                 # Custom axis text
        legend.position = "none"                                              # Remove legend
      )
    
    # Print the plot
    print(p)
  } else {
    # Output message if the protein is not found
    cat("The protein", protein_of_interest, "is not present in the dataset.\n")
  }
}

# Plot the protein counts for "GNAI2"
plot_protein_counts("GNAI2")

MDS plots and SVA

# Transform raw count data and remove NA values
logCounts_noNA <- na.omit(logCounts)

# Create DGEList object and define the design matrix
annot_reordered <- annot[match(rownames(logCounts_noNA), annot$Genes), ]
d <- DGEList(counts = logCounts_noNA, genes = annot_reordered, samples = meta)
design <- model.matrix(~0 + Condition, data = d$samples)

# 1. MDS Plot for Raw Data
mds_raw <- plotMDS(logCounts_noNA, plot = FALSE)
mds_plot_raw <- aamisc::ggMDS(mds = mds_raw, meta = d$samples, color.by = "Group", shape.by = "Timepoint", legend.position = "right") +
  scale_color_manual(values = publication_colors) +
  labs(title = "MDS: Raw Data", x = "MDS Dimension 1", y = "MDS Dimension 2") +
  theme(legend.title = element_text(face = "bold"), plot.title = element_text(face = "bold", hjust = 0.5))

# 2. MDS Plot for Data Adjusted by Blocking for Horse
logCounts_blocked <- removeBatchEffect(logCounts_noNA, batch = as.factor(d$samples$Horse), design = design)
## Coefficients not estimable: batch22 batch23
## Warning: Partial NA coefficients for 769 probe(s)
mds_blocked <- plotMDS(logCounts_blocked, plot = FALSE)
mds_plot_blocked <- aamisc::ggMDS(mds = mds_blocked, meta = d$samples, color.by = "Group", shape.by = "Timepoint", legend.position = "right") +
  scale_color_manual(values = publication_colors) +
  labs(title = "MDS: Blocked for Horse", x = "MDS Dimension 1", y = "MDS Dimension 2") +
  theme(legend.title = element_text(face = "bold"), plot.title = element_text(face = "bold", hjust = 0.5))

# 3. MDS Plot for Data Adjusted Using ComBat
combat_data <- ComBat(dat = logCounts_noNA, batch = as.factor(d$samples$Batch), mod = model.matrix(~Condition, data = d$samples))
## Found4batches
## Adjusting for5covariate(s) or covariate level(s)
## Standardizing Data across genes
## Fitting L/S model and finding priors
## Finding parametric adjustments
## Adjusting the Data
mds_combat <- plotMDS(combat_data, plot = FALSE)
mds_plot_combat <- aamisc::ggMDS(mds = mds_combat, meta = d$samples, color.by = "Group", shape.by = "Timepoint", legend.position = "right") +
  scale_color_manual(values = publication_colors) +
  labs(title = "MDS: ComBat Adjusted", x = "MDS Dimension 1", y = "MDS Dimension 2") +
  theme(legend.title = element_text(face = "bold"), plot.title = element_text(face = "bold", hjust = 0.5))

# 4. MDS Plot for Data Adjusted Using SVA
mod <- model.matrix(~0 + Condition, data = d$samples)
mod0 <- model.matrix(~1, data = d$samples)
n.sv <- num.sv(logCounts_noNA, mod, method = "be")
logCounts_noNA <- as.matrix(logCounts_noNA)
storage.mode(logCounts_noNA) <- "numeric"
svobj <- sva(logCounts_noNA, mod, mod0, n.sv = n.sv)
## Number of significant surrogate variables is:  2 
## Iteration (out of 5 ):1  2  3  4  5
modSv <- cbind(mod, svobj$sv)
logCounts_sva <- removeBatchEffect(logCounts_noNA, batch = d$samples$Horse, design = modSv)
## Coefficients not estimable: batch22 batch23
## Warning: Partial NA coefficients for 769 probe(s)
mds_sva <- plotMDS(logCounts_sva, plot = FALSE)
mds_plot_sva <- aamisc::ggMDS(mds = mds_sva, meta = d$samples, color.by = "Group", shape.by = "Timepoint", legend.position = "right") +
  scale_color_manual(values = publication_colors) +
  labs(title = "MDS: SVA Adjusted", x = "MDS Dimension 1", y = "MDS Dimension 2") +
  theme(legend.title = element_text(face = "bold"), plot.title = element_text(face = "bold", hjust = 0.5))

# 5. MDS Plot for Data Adjusted Using SVA (with Labels)
mds_plot_sva_labelled <- aamisc::ggMDS(mds = mds_sva, meta = d$samples, color.by = "Group", shape.by = "Timepoint", legend.position = "right", text.by = "Horse", text.size = 1.5) +
  scale_color_manual(values = publication_colors) +
  labs(title = "MDS: SVA Adjusted (Labelled)", x = "MDS Dimension 1", y = "MDS Dimension 2") +
  theme(legend.title = element_text(face = "bold"), plot.title = element_text(face = "bold", hjust = 0.5))

# Display the plots
print(mds_plot_raw)

print(mds_plot_combat)

print(mds_plot_blocked)

print(mds_plot_sva)

print(mds_plot_sva_labelled)

Session Info

#TODO: FYI: It is important to have this section for reproducibility
sessionInfo()
## R version 4.4.2 (2024-10-31)
## Platform: aarch64-apple-darwin20
## Running under: macOS Sequoia 15.0.1
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.0
## 
## locale:
## [1] C
## 
## time zone: Europe/Copenhagen
## tzcode source: internal
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] ggpubr_0.6.0                ggrepel_0.9.6              
##  [3] sva_3.54.0                  BiocParallel_1.39.0        
##  [5] genefilter_1.88.0           mgcv_1.9-1                 
##  [7] nlme_3.1-166                aamisc_0.1.5               
##  [9] fdrtool_1.2.18              Metrics_0.1.4              
## [11] SummarizedExperiment_1.36.0 Biobase_2.66.0             
## [13] GenomicRanges_1.58.0        GenomeInfoDb_1.42.0        
## [15] IRanges_2.40.0              S4Vectors_0.44.0           
## [17] BiocGenerics_0.52.0         MatrixGenerics_1.18.0      
## [19] DEP_1.28.0                  preprocessCore_1.68.0      
## [21] DEqMS_1.24.0                matrixStats_1.4.1          
## [23] RColorBrewer_1.1-3          missForest_1.5             
## [25] dplyr_1.1.4                 openxlsx_4.2.7.1           
## [27] patchwork_1.3.0             data.table_1.16.2          
## [29] ggplot2_3.5.1               stringr_1.5.1              
## [31] tibble_3.2.1                magrittr_2.0.3             
## [33] biomaRt_2.62.0              readxl_1.4.3               
## [35] readr_2.1.5                 edgeR_4.4.0                
## [37] limma_3.62.1                pacman_0.5.1               
## 
## loaded via a namespace (and not attached):
##   [1] splines_4.4.2               later_1.3.2                
##   [3] norm_1.0-11.1               filelock_1.0.3             
##   [5] R.oo_1.27.0                 cellranger_1.1.0           
##   [7] XML_3.99-0.17               lifecycle_1.0.4            
##   [9] httr2_1.0.6                 rstatix_0.7.2              
##  [11] doParallel_1.0.17           vroom_1.6.5                
##  [13] lattice_0.22-6              MASS_7.3-61                
##  [15] MultiAssayExperiment_1.32.0 backports_1.5.0            
##  [17] sass_0.4.9                  rmarkdown_2.29             
##  [19] jquerylib_0.1.4             yaml_2.3.10                
##  [21] httpuv_1.6.15               doRNG_1.8.6                
##  [23] zip_2.3.1                   MsCoreUtils_1.18.0         
##  [25] DBI_1.2.3                   abind_1.4-8                
##  [27] zlibbioc_1.52.0             R.utils_2.12.3             
##  [29] purrr_1.0.2                 HarmonicRegression_1.0     
##  [31] AnnotationFilter_1.30.0     itertools_0.1-3            
##  [33] rappdirs_0.3.3              sandwich_3.1-1             
##  [35] circlize_0.4.16             GenomeInfoDbData_1.2.13    
##  [37] annotate_1.84.0             MSnbase_2.31.1             
##  [39] ncdf4_1.23                  codetools_0.2-20           
##  [41] DelayedArray_0.32.0         DT_0.33                    
##  [43] xml2_1.3.6                  tidyselect_1.2.1           
##  [45] gmm_1.8                     shape_1.4.6.1              
##  [47] UCSC.utils_1.2.0            farver_2.1.2               
##  [49] gmp_0.7-5                   BiocFileCache_2.14.0       
##  [51] jsonlite_1.8.9              multtest_2.62.0            
##  [53] GetoptLong_1.0.5            Formula_1.2-5              
##  [55] survival_3.7-0              iterators_1.0.14           
##  [57] systemfonts_1.1.0           foreach_1.5.2              
##  [59] tools_4.4.2                 progress_1.2.3             
##  [61] ragg_1.3.3                  Rcpp_1.0.13-1              
##  [63] glue_1.8.0                  gridExtra_2.3              
##  [65] SparseArray_1.6.0           xfun_0.49                  
##  [67] qvalue_2.38.0               shinydashboard_0.7.2       
##  [69] withr_3.0.2                 BiocManager_1.30.25        
##  [71] fastmap_1.2.0               fansi_1.0.6                
##  [73] digest_0.6.37               mime_0.12                  
##  [75] R6_2.5.1                    textshaping_0.4.0          
##  [77] imputeLCMD_2.1              colorspace_2.1-1           
##  [79] RSQLite_2.3.8               R.methodsS3_1.8.2          
##  [81] hexbin_1.28.4               utf8_1.2.4                 
##  [83] tidyr_1.3.1                 generics_0.1.3             
##  [85] prettyunits_1.2.0           PSMatch_1.10.0             
##  [87] httr_1.4.7                  htmlwidgets_1.6.4          
##  [89] S4Arrays_1.6.0              pkgconfig_2.0.3            
##  [91] NISTunits_1.0.1             gtable_0.3.6               
##  [93] blob_1.2.4                  ComplexHeatmap_2.22.0      
##  [95] impute_1.80.0               XVector_0.46.0             
##  [97] htmltools_0.5.8.1           carData_3.0-5              
##  [99] rain_1.40.0                 MALDIquant_1.22.3          
## [101] ProtGenerics_1.38.0         clue_0.3-65                
## [103] scales_1.3.0                tmvtnorm_1.6               
## [105] png_0.1-8                   knitr_1.49                 
## [107] rstudioapi_0.17.1           tzdb_0.4.0                 
## [109] reshape2_1.4.4              rjson_0.2.23               
## [111] curl_6.0.0                  cachem_1.1.0               
## [113] zoo_1.8-12                  GlobalOptions_0.1.2        
## [115] parallel_4.4.2              AnnotationDbi_1.68.0       
## [117] mzID_1.44.0                 vsn_3.74.0                 
## [119] pillar_1.9.0                grid_4.4.2                 
## [121] vctrs_0.6.5                 pcaMethods_1.98.0          
## [123] promises_1.3.0              randomForest_4.7-1.2       
## [125] car_3.1-3                   dbplyr_2.5.0               
## [127] xtable_1.8-4                cluster_2.1.6              
## [129] evaluate_1.0.1              mvtnorm_1.3-2              
## [131] cli_3.6.3                   locfit_1.5-9.10            
## [133] compiler_4.4.2              rlang_1.1.4                
## [135] crayon_1.5.3                rngtools_1.5.2             
## [137] ggsignif_0.6.4              labeling_0.4.3             
## [139] QFeatures_1.16.0            affy_1.84.0                
## [141] plyr_1.8.9                  stringi_1.8.4              
## [143] assertthat_0.2.1            munsell_0.5.1              
## [145] Biostrings_2.74.0           lazyeval_0.2.2             
## [147] Matrix_1.7-1                hms_1.1.3                  
## [149] bit64_4.5.2                 KEGGREST_1.46.0            
## [151] statmod_1.5.0               shiny_1.9.1                
## [153] mzR_2.40.0                  broom_1.0.7                
## [155] igraph_2.1.1                memoise_2.0.1              
## [157] affyio_1.76.0               bslib_0.8.0                
## [159] bit_4.5.0